# _*_ coding : utf-8 _*_
# @Time : 2023/12/11 10:28
# @Author : 韩宗森
# @File : 18_解析百度一下
# @Project : pythonProject


from lxml import etree
import urllib.request

# (1)获取网页的源码
# (2)解析解析的服务器响应的文件etree.HTML
# (3)打印

# url = 'https://www.baidu.com/'

url = 'https://sc.chinaz.com/tupian/meinvtupian.html'
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36'
}

# 请求对象的定制
request = urllib.request.Request(url=url, headers=headers)

# 模拟请求
response =urllib.request.urlopen(request)

# 获取源码
content = response.read().decode('utf-8')

print(content)

#解析网页源码来获取我们想要的数据

# 解析服务器响应的文件
tree= etree.HTML(content)

# 获取想要的数据 xpath的返回值是一个列表类型的数据
# result = tree.xpath('//input[@id="su"]/@value')[0]
result = tree.xpath('//div[@class="tupian-list com-img-txt-list"]/div/img/@alt')

print(result)